R Markdown

# Loading Packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(bulletxtrctr)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl_init' failed, running with rgl.useNULL = TRUE
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
library(x3ptools)
library(ggplot2)
library(readr)
library(purrr)
library(stringr)
library(tidyr)
load("Ground_Truth_Data.RData") 
#For Html Purposes
#Checking scan orientation

image(Phoenix$x3p[[1]])

image(Phoenix$x3p[[10]])

image(Phoenix$x3p[[100]])

image(Phoenix$x3p[[200]])

Notes Bullets from the same barrel are considered matches. Bullets from different barrels are not considered matches. In case a bullet is unknown, we do not know whether there is a match or not.

Lands from the same bullet are considered matches if they are from the same barrel. In case the lands are from bullets from different barrels they are not matches.

Questions to be Answered There are a few key relationships we are trying to determine when comparing two Barrel-Bullet-Land IDs: - Matches between questioned and known bullets - Matches between questioned and other questioned bullets - Same lands between different bullets from the same barrel. - Same lands between any questioned bullets with any other bullet.

# Determining Ground Truth
# These features were obtained from the 2019 summer Bullet Project
# Features were extracted from Bullet-Barrel-Land scans using the x3ptools & bulletxtrctr packages

features <- Phoenix_Comparisons %>% 
  select(-cms2_per_mm, -cms_per_mm, -lag_mm, -length_mm, 
         -matches_per_mm, -mismatches_per_mm, -non_cms_per_mm, -rfscore)

features <- features %>% mutate(
  samesource = ifelse(BarrelA==BarrelB & BulletA == BulletB, LandA==LandB, NA),
  samebarrel = BarrelA == BarrelB,
  samebarrel = ifelse((BarrelA == "Unknown" | BarrelB == "Unknown") & (BulletA != BulletB), NA, samebarrel), 
  samesource = ifelse(!samebarrel, FALSE, samesource)
  )

features <- features %>% mutate(
  comparison = paste0(pmin(Bullet1, Bullet2)," vs ", pmax(Bullet1, Bullet2))
)

Explanation We have created three new features: samesource - 1/6 between Barrel-Bullet-Land Scores samebarrel - Same Barrels when being compared or not the same Barrels when being compared comparison - Essential an ID column saying which Barrel-Bullet-Lands are being compared

head(features, 10)
## # A tibble: 10 x 24
##    Bullet1 Bullet2   ccf   cms  cms2      D   lag length matches mismatches
##    <chr>   <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>   <dbl>      <dbl>
##  1 A9-B1-1 A9-B1-1 1        39    19 0          0   2821      39          0
##  2 A9-B1-1 A9-B1-2 0.504     2     3 0.0320  -575   2716       5         31
##  3 A9-B1-1 A9-B1-3 0.337     2     4 0.0341   579   2663       7         28
##  4 A9-B1-1 A9-B1-4 0.339     3     2 0.0428  -386   2551       7         28
##  5 A9-B1-1 A9-B1-5 0.227     2     2 0.0341   497   2668       7         31
##  6 A9-B1-1 A9-B1-6 0.292     2     2 0.0303  -301   2684       6         30
##  7 A9-B1-1 A9-B2-1 0.539     2     2 0.0283  -640   2778       5         34
##  8 A9-B1-1 A9-B2-2 0.272     4     5 0.0382  -580   2718       9         30
##  9 A9-B1-1 A9-B2-3 0.365     3     2 0.0335   488   2821       3         35
## 10 A9-B1-1 A9-B2-4 0.187     1     0 0.0268    -4   2821       2         18
## # … with 14 more variables: non_cms <dbl>, overlap <dbl>, rough_cor <dbl>,
## #   sd_D <dbl>, sum_peaks <dbl>, BarrelA <chr>, BarrelB <chr>,
## #   BulletA <chr>, BulletB <chr>, LandA <chr>, LandB <chr>,
## #   samesource <lgl>, samebarrel <lgl>, comparison <chr>
features <- features %>% select(-cms2, -lag, -length) # Cleaning for future exploration

head(features, 10)
## # A tibble: 10 x 21
##    Bullet1 Bullet2   ccf   cms      D matches mismatches non_cms overlap
##    <chr>   <chr>   <dbl> <dbl>  <dbl>   <dbl>      <dbl>   <dbl>   <dbl>
##  1 A9-B1-1 A9-B1-1 1        39 0           39          0       0   1    
##  2 A9-B1-1 A9-B1-2 0.504     2 0.0320       5         31      10   0.788
##  3 A9-B1-1 A9-B1-3 0.337     2 0.0341       7         28      12   0.842
##  4 A9-B1-1 A9-B1-4 0.339     3 0.0428       7         28      11   0.849
##  5 A9-B1-1 A9-B1-5 0.227     2 0.0341       7         31      10   0.871
##  6 A9-B1-1 A9-B1-6 0.292     2 0.0303       6         30      10   0.888
##  7 A9-B1-1 A9-B2-1 0.539     2 0.0283       5         34      13   0.770
##  8 A9-B1-1 A9-B2-2 0.272     4 0.0382       9         30      14   0.787
##  9 A9-B1-1 A9-B2-3 0.365     3 0.0335       3         35      22   0.827
## 10 A9-B1-1 A9-B2-4 0.187     1 0.0268       2         18      13   0.999
## # … with 12 more variables: rough_cor <dbl>, sd_D <dbl>, sum_peaks <dbl>,
## #   BarrelA <chr>, BarrelB <chr>, BulletA <chr>, BulletB <chr>,
## #   LandA <chr>, LandB <chr>, samesource <lgl>, samebarrel <lgl>,
## #   comparison <chr>

Notes Part 2 We are going to calculate: - “bestlandmatch” using SAM scores to determine which lands match the best between any two bullets

f2nest <- features %>%
  group_by(BarrelA, BulletA, BarrelB, BulletB) %>% nest()

f3nest <- f2nest %>% mutate(
  sam_ccf = data %>% purrr::map_dbl(.f=function(d) 
    max(bulletxtrctr::compute_average_scores(d$LandA, d$LandB, d$ccf, addNA = TRUE)))
  )
head(f3nest)
## # A tibble: 6 x 6
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [6]
##   BarrelA BarrelB BulletA BulletB            data sam_ccf
##   <chr>   <chr>   <chr>   <chr>   <list<df[,17]>>   <dbl>
## 1 A9      A9      B1      B1            [36 × 17]   1    
## 2 A9      A9      B2      B1            [36 × 17]   0.883
## 3 A9      A9      B3      B1            [36 × 17]   0.771
## 4 C8      A9      B1      B1            [30 × 17]   0.399
## 5 C8      A9      B2      B1            [36 × 17]   0.403
## 6 C8      A9      B3      B1            [36 × 17]   0.371
f3nest <- f3nest %>% mutate(
  data = data %>% purrr::map(.f = function(d) {
    d$bestlandmatch = bulletxtrctr::bullet_to_land_predict(d$LandA, d$LandB, d$ccf, difference = -1, alpha = 1, addNA = FALSE)
    d
  })
)
head(f3nest)
## # A tibble: 6 x 6
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [6]
##   BarrelA BarrelB BulletA BulletB data               sam_ccf
##   <chr>   <chr>   <chr>   <chr>   <list>               <dbl>
## 1 A9      A9      B1      B1      <tibble [36 × 18]>   1    
## 2 A9      A9      B2      B1      <tibble [36 × 18]>   0.883
## 3 A9      A9      B3      B1      <tibble [36 × 18]>   0.771
## 4 C8      A9      B1      B1      <tibble [30 × 18]>   0.399
## 5 C8      A9      B2      B1      <tibble [36 × 18]>   0.403
## 6 C8      A9      B3      B1      <tibble [36 × 18]>   0.371
f3nest <- f3nest %>% mutate(
  data = data %>% purrr::map(.f = function(d) {
    d$sameland_pred = bulletxtrctr::bullet_to_land_predict(d$LandA, d$LandB, d$ccf, difference = 0.1, alpha = 0.05, addNA = FALSE)
    d
  })
)
head(f3nest)
## # A tibble: 6 x 6
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [6]
##   BarrelA BarrelB BulletA BulletB data               sam_ccf
##   <chr>   <chr>   <chr>   <chr>   <list>               <dbl>
## 1 A9      A9      B1      B1      <tibble [36 × 19]>   1    
## 2 A9      A9      B2      B1      <tibble [36 × 19]>   0.883
## 3 A9      A9      B3      B1      <tibble [36 × 19]>   0.771
## 4 C8      A9      B1      B1      <tibble [30 × 19]>   0.399
## 5 C8      A9      B2      B1      <tibble [36 × 19]>   0.403
## 6 C8      A9      B3      B1      <tibble [36 × 19]>   0.371
f3nest <- f3nest %>% mutate(
  samebarrel_pred = data %>% purrr::map_dbl(.f = function(d)
    max(d$sameland_pred))
)
head(f3nest)
## # A tibble: 6 x 7
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [6]
##   BarrelA BarrelB BulletA BulletB data              sam_ccf samebarrel_pred
##   <chr>   <chr>   <chr>   <chr>   <list>              <dbl>           <dbl>
## 1 A9      A9      B1      B1      <tibble [36 × 19…   1                   1
## 2 A9      A9      B2      B1      <tibble [36 × 19…   0.883               1
## 3 A9      A9      B3      B1      <tibble [36 × 19…   0.771               1
## 4 C8      A9      B1      B1      <tibble [30 × 19…   0.399               0
## 5 C8      A9      B2      B1      <tibble [36 × 19…   0.403               0
## 6 C8      A9      B3      B1      <tibble [36 × 19…   0.371               0

Notes on Newly created features bestlandmatch and sameland_pred are both nested currently in the data column. This column is a tibble. samebarrel_pred is showing the lands for each barrel comparisons of which have the higher sameland_pred scores. It seems to be the first 3 lands between BulletA and BulletB.

##Check Predictions - Now use "bestlandmatch to identify matching lands betweem different bullets from the same barrel

f3long <- f3nest %>% 
  filter(BarrelA == BarrelB, BulletA != BulletB, BarrelA != "Unknown") %>%
    unnest(data) # Only looking at Barrels that are the same
head(f3long)
## # A tibble: 6 x 25
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [1]
##   BarrelA BarrelB BulletA BulletB Bullet1 Bullet2   ccf   cms       D
##   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <dbl> <dbl>   <dbl>
## 1 A9      A9      B2      B1      A9-B1-1 A9-B2-1 0.539     2 0.0283 
## 2 A9      A9      B2      B1      A9-B1-1 A9-B2-2 0.272     4 0.0382 
## 3 A9      A9      B2      B1      A9-B1-1 A9-B2-3 0.365     3 0.0335 
## 4 A9      A9      B2      B1      A9-B1-1 A9-B2-4 0.187     1 0.0268 
## 5 A9      A9      B2      B1      A9-B1-1 A9-B2-5 0.362     5 0.0267 
## 6 A9      A9      B2      B1      A9-B1-1 A9-B2-6 0.927    12 0.00757
## # … with 16 more variables: matches <dbl>, mismatches <dbl>,
## #   non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## #   sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## #   samebarrel <lgl>, comparison <chr>, bestlandmatch <lgl>,
## #   sameland_pred <lgl>, sam_ccf <dbl>, samebarrel_pred <dbl>

###Let’s Plot some Heat Maps

f3long %>%
  ggplot(aes(x = LandA, y = LandB, fill = ccf))+
  geom_tile()+
  facet_wrap(~BarrelA+BulletA)+
  scale_fill_gradient2(low = "darkgrey", high = "darkorange", midpoint = 0.5)+
  scale_colour_manual(values = "darkorange")+
  geom_point(colour = "midnightblue", data = f3long %>% filter(bestlandmatch))

f3long %>%
  ggplot(aes(x = LandA, y = LandB, fill = ccf))+
  geom_tile()+
  facet_wrap(~BarrelA+BulletA+BulletB)+
  scale_fill_gradient2(low = "darkgrey", high = "darkorange", midpoint = 0.5)+
  scale_colour_manual(values = "darkorange")+
  geom_point(colour = "midnightblue", data = f3long %>% filter(bestlandmatch))

#Distribution of best land matches from f3long
table(f3long$bestlandmatch)
## 
## FALSE  TRUE 
##  1400   280
#Distribution of samesource
table(features$samesource)
## 
## FALSE  TRUE 
## 18642   202
#Distribution of samebarrel
table(features$samebarrel)
## 
## FALSE  TRUE 
## 17642  2882

There seems to be a small amount of missing data. Barrel C8, Bullet B1, Land 3 is missing. Barrel U1, Bullet B3, Land 5 is missing.

Investigate the missing data Barrels C8 and U1 — id: C8-B1-3 and U1-B3-5 are missing in the original phoenix data_set

#Eye-Catchers(Best Land Matches that are not high ccf values):

Barrel F6: B1|B3(4,3), B2|B3(2,3) Barrel L5: B1|B2(1,2), B2|B3(2,1) Barrel M2: B1|B2(1,2), B2|B3(2,5) Barrel R3: B1|B3(4,5)+(5,6) [Weird but also same exact ccf values but differnt bullet comparisons] B2|B3(4,5)+(5,6) Barrel U1: B1|B2(1,3)+(2,4), B1|B3(2,6)

F6-B1-4 vs F6-B3-3: samebarrel = TRUE, bestlandmatch = TRUE, sameland_pred = TRUE, ccf = 0.52, sam_ccf = 0.72, (matches and mismatches both = 18?, overlap = 1.00)

F6-B2-2 vs F6-B3-3: samebarrel = TRUE, beslandmatch = TRUE, sameland_pred = TRUE, ccf = 0.49, sam_ccf = 0.71, (matches = 28, mismatches = 11, overlap = 0.99)

# F6-B1-4 and F6-B3-3
# F6-B2-2 and F6-B3-3

image(Phoenix$x3p[[39]])

image(Phoenix$x3p[[50]])

image(Phoenix$x3p[[43]])

image(Phoenix$x3p[[50]])

# F6-B1-4 and F6-B2-2 seem to be in very bad shape
#### Barrel M2: B1|B2(1,2), B2|B3(2,5)

# M2-B1-1 and M2-B2-2
# M2-B2-2 and M2-B2-5

image(Phoenix$x3p[[72]])

image(Phoenix$x3p[[79]])

image(Phoenix$x3p[[79]])
image(Phoenix$x3p[[82]])

#Lands 1, 2, and 5 seem to have similar damage near the heal of the bullet
#### Barrel R3: B1|B3(4,5)+(5,6) [Weird but also same exact ccf values but differnt bullet comparisons] B2|B3(4,5)+(5,6)

# R3-B1-4 and R3-B3-5
# R3-B1-5 and R3-B3-6
# R3-B2-4 and R3-B3-5
# R3-B2-5 and R3-B3-6

image(Phoenix$x3p[[111]])

image(Phoenix$x3p[[124]])

image(Phoenix$x3p[[112]])

image(Phoenix$x3p[[125]])

image(Phoenix$x3p[[117]])

image(Phoenix$x3p[[124]])

image(Phoenix$x3p[[118]])

image(Phoenix$x3p[[125]])

features <- f3nest %>% unnest(data)

features <- features %>% mutate(
  samesource = ifelse(BarrelA == BarrelB & BulletA != BulletB & BarrelA != "Unknown", bestlandmatch, samesource)
)
head(features, 10)
## # A tibble: 10 x 25
## # Groups:   BarrelA, BarrelB, BulletA, BulletB [1]
##    BarrelA BarrelB BulletA BulletB Bullet1 Bullet2   ccf   cms      D
##    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <dbl> <dbl>  <dbl>
##  1 A9      A9      B1      B1      A9-B1-1 A9-B1-1 1        39 0     
##  2 A9      A9      B1      B1      A9-B1-1 A9-B1-2 0.504     2 0.0320
##  3 A9      A9      B1      B1      A9-B1-1 A9-B1-3 0.337     2 0.0341
##  4 A9      A9      B1      B1      A9-B1-1 A9-B1-4 0.339     3 0.0428
##  5 A9      A9      B1      B1      A9-B1-1 A9-B1-5 0.227     2 0.0341
##  6 A9      A9      B1      B1      A9-B1-1 A9-B1-6 0.292     2 0.0303
##  7 A9      A9      B1      B1      A9-B1-2 A9-B1-1 0.504     2 0.0320
##  8 A9      A9      B1      B1      A9-B1-2 A9-B1-2 1        41 0     
##  9 A9      A9      B1      B1      A9-B1-2 A9-B1-3 0.367     1 0.0386
## 10 A9      A9      B1      B1      A9-B1-2 A9-B1-4 0.370     2 0.0330
## # … with 16 more variables: matches <dbl>, mismatches <dbl>,
## #   non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## #   sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## #   samebarrel <lgl>, comparison <chr>, bestlandmatch <lgl>,
## #   sameland_pred <lgl>, sam_ccf <dbl>, samebarrel_pred <dbl>
#Description of features same source, FALSE and TRUE both increased in entries
table(features$samesource)
## 
## FALSE  TRUE 
## 20042   482
f3nest <- features %>%
  group_by(comparison, BarrelA, BulletA, BarrelB, 
           BulletB, samebarrel, samebarrel_pred, sam_ccf,
           Bullet1, Bullet2) %>% nest()

Matches with questioned bullets

Any matches with questioned bullets are based on predictions only. Ground truth can only be determined in collaboration with the owners of the study.

This affects all of the following open questions: -matches between questioned and known bullets - matches between questioned and other questioned bullets - same lands between any questioned bullets woth any other bullets

In comparing known samesource(base on bootstrap) we expect to see orange dots(predicted samesource) for known samesource and gray dots(predicted different source) for known different source comparisons $(*)

f3nest <- f3nest %>% mutate(
  truth = c("different-source", "same-source")[samebarrel+1],
  truth = ifelse(is.na(samebarrel), "Unknown", truth),
  prediction = c("different-source", "same-source")[samebarrel_pred+1]
)
# plot $(*)

f3nest %>% group_by(comparison) %>%
  summarise(sam_ccf = mean(sam_ccf),
            truth = truth[1],
            prediction = prediction[1]) %>%
  ggplot(aes(x = sam_ccf, y = truth, colour = prediction))+
  geom_jitter()+
  theme_bw()+
  scale_colour_manual(values = c("darkgrey", "darkorange"))

Graph Analysis Looking at the graph above we can see many points plotted but what is being described here? We have 3 sources which are “same-source”, “different-source”, and “Unknown.”

Our results look very good. We would expect to see same-source points plotted around higher sam_ccf scores and different-source points plotted at the lower end of sam_ccf scores. The Predicted “same-source” points are ploted around the true “same-source” points. The predicted “different-source” points are plotted arund the true “different-source” points. For the true “unknown” barrels in question, we see a mix of predicted “different source” and “same-source” points. There doesn’t seem to be any predicted “different-source”" points plotted around the true “same-source” area and vice versus. These results are exactly what we were hoping for.

#There does not seem to be any recognized truth that is dofferent than the predictions

f3nest %>% filter(truth == "different-source" & prediction == "same-source")
## # A tibble: 0 x 13
## # Groups:   BarrelA, BarrelB, BulletA, BulletB, Bullet1, Bullet2,
## #   samebarrel, comparison, sam_ccf, samebarrel_pred [0]
## # … with 13 variables: BarrelA <chr>, BarrelB <chr>, BulletA <chr>,
## #   BulletB <chr>, Bullet1 <chr>, Bullet2 <chr>, samebarrel <lgl>,
## #   comparison <chr>, sam_ccf <dbl>, samebarrel_pred <dbl>,
## #   data <list<df[,15]>>, truth <chr>, prediction <chr>
#Filter for questioned bullets
questioned <- f3nest %>% filter(BarrelB == "Unknown")
table(questioned$BarrelA)
## 
##      A9      C8      F6      L5      M2      P7      R3      U1 Unknown 
##    1080    1020    1080    1080    1080    1080    1080    1020    3600
questioned <- questioned %>%
  ungroup(BarrelA) %>%
  mutate(
    BarrelA = factor(factor(BarrelA), levels = c("A9", "C8", "F6", "L5", "M2", "P7", "R3", "U1", "Unknown")),
    BulletB = factor(BulletB, levels = rev(c("B", "E", "H", "J", "K", "N", "Q", "T", "Y", "Z"))),
    BulletA = factor(BulletA, levels = c("B1", "B2", "B3", rev(levels(BulletB))))
  ) 



questioned %>%
  ggplot(aes(x = BulletA, y = BulletB, fill=sam_ccf)) + 
  geom_tile(data = questioned) +
  scale_fill_gradient2(low="darkgrey", high="darkorange", midpoint=.5) +
  scale_colour_manual(values=c("darkorange")) +
  facet_grid(.~BarrelA, space = "free", scales="free") +
  theme_bw() +
  geom_point(colour = "midnightblue", data = questioned %>% filter(samebarrel_pred==1))

The figure suggests that mostly, we should be able to accept the same-source predictions for each of the barrels as ground truth. In particular, concistency in the matches between a questioned bullet and a single barrel provide strong evidence in the correctness of a prediction: ideally, we would like to see a match between - a questioned bullet and all three bullets of a single barrel - questioned bullets that match to the same barrel

This is true for all unknown bullets with the exception of bullets Q, Y, and Z. Bullets Q, Y, and Z match to none of the bullets of any barrel.

f3nest <- f3nest %>% ungroup(samebarrel) %>%
  mutate(
  samebarrel = ifelse(BarrelA == "Unknown" & (BulletA != BulletB),
                      samebarrel_pred == 1, samebarrel)
)
f3nest <- f3nest %>% mutate(
  samebarrel = ifelse(BarrelB == "Unknown" & (BulletA != BulletB),
                      samebarrel_pred == 1, samebarrel)
)
head(f3nest)
## # A tibble: 6 x 13
##   BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
##   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <lgl>      <chr>     
## 1 A9      A9      B1      B1      A9-B1-1 A9-B1-1 TRUE       A9-B1-1 v…
## 2 A9      A9      B1      B1      A9-B1-1 A9-B1-2 TRUE       A9-B1-1 v…
## 3 A9      A9      B1      B1      A9-B1-1 A9-B1-3 TRUE       A9-B1-1 v…
## 4 A9      A9      B1      B1      A9-B1-1 A9-B1-4 TRUE       A9-B1-1 v…
## 5 A9      A9      B1      B1      A9-B1-1 A9-B1-5 TRUE       A9-B1-1 v…
## 6 A9      A9      B1      B1      A9-B1-1 A9-B1-6 TRUE       A9-B1-1 v…
## # … with 5 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## #   data <list<df[,15]>>, truth <chr>, prediction <chr>
f2 <- f3nest %>% unnest(data)
f2 <- f2 %>% mutate(
  samesource = ifelse(is.na(samesource) & !samebarrel, FALSE, samesource),
  samesource = ifelse(is.na(samesource) & samebarrel, bestlandmatch, samesource)
)
head(f2)
## # A tibble: 6 x 27
##   BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
##   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <lgl>      <chr>     
## 1 A9      A9      B1      B1      A9-B1-1 A9-B1-1 TRUE       A9-B1-1 v…
## 2 A9      A9      B1      B1      A9-B1-1 A9-B1-2 TRUE       A9-B1-1 v…
## 3 A9      A9      B1      B1      A9-B1-1 A9-B1-3 TRUE       A9-B1-1 v…
## 4 A9      A9      B1      B1      A9-B1-1 A9-B1-4 TRUE       A9-B1-1 v…
## 5 A9      A9      B1      B1      A9-B1-1 A9-B1-5 TRUE       A9-B1-1 v…
## 6 A9      A9      B1      B1      A9-B1-1 A9-B1-6 TRUE       A9-B1-1 v…
## # … with 19 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## #   ccf <dbl>, cms <dbl>, D <dbl>, matches <dbl>, mismatches <dbl>,
## #   non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## #   sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## #   bestlandmatch <lgl>, sameland_pred <lgl>, truth <chr>,
## #   prediction <chr>
questioned <- f2 %>% filter(BarrelB == "Unknown")

questioned <- questioned %>%
  ungroup(BarrelA) %>%
  mutate(
    BarrelA = factor(factor(BarrelA), levels = c("A9", "C8", "F6", "L5", "M2", "P7", "R3", "U1", "Unknown")),
    BulletB = factor(BulletB, levels = rev(c("B", "E", "H", "J", "K", "N", "Q", "T", "Y", "Z"))),
    BulletA = factor(BulletA, levels = c("B1", "B2", "B3", rev(levels(BulletB))))
  ) 


questioned %>%
  ggplot(aes(x = LandA, y = LandB, fill=ccf)) + geom_tile() +
  scale_fill_gradient2(low="darkgrey", high="darkorange", midpoint=.5) +
  facet_grid(BulletB~BarrelA+BulletA) +
  geom_point(colour = "red", data = questioned %>% filter(samesource))

table(f2$samesource)
## 
## FALSE  TRUE 
## 40072   732
f2 <- f2 %>% mutate(
  samesource = ifelse(questioned$samesource == "TRUE" & f2$samesource == "FALSE", TRUE, samesource)
)
## Warning in questioned$samesource == "TRUE" & f2$samesource == "FALSE":
## longer object length is not a multiple of shorter object length
table(f2$samesource)
## 
## FALSE  TRUE 
## 39447  1357

Accuracy for models is around the same. Now we will try and find miss classified ground truth results by examining signatures between Barrel_Bullet_Land comparisons.

Signatures_Phoenix %>%
  filter(Barrel != "Unknown") %>%
    ggplot(data = ., aes(x = x, y = sig, color = Land)) + 
      geom_line()+
        facet_grid(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))+
          theme_bw()+
            ggtitle("Signatures for Known Barrels")
## Warning: Removed 6796 rows containing missing values (geom_path).

Signatures_Phoenix %>%
  filter(Barrel == "Unknown") %>%
    ggplot(data = ., aes(x = x, y = sig, color = Land)) + 
      geom_line()+
        facet_wrap(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))+
          theme_bw()+
            ggtitle("Signatures for Questioned Bullets")
## Warning: Removed 4604 rows containing missing values (geom_path).

# High ccf & samesource('FALSE')


f2 %>% 
  filter(ccf >= 0.70 & samesource == "FALSE")
## # A tibble: 12 x 27
##    BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
##    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <lgl>      <chr>     
##  1 P7      L5      B3      B1      L5-B1-3 P7-B3-5 FALSE      L5-B1-3 v…
##  2 Unknown L5      J       B1      L5-B1-3 Unknow… FALSE      L5-B1-3 v…
##  3 U1      L5      B1      B2      L5-B2-2 U1-B1-1 FALSE      L5-B2-2 v…
##  4 Unknown M2      B       B2      M2-B2-4 Unknow… FALSE      M2-B2-4 v…
##  5 L5      P7      B1      B3      P7-B3-5 L5-B1-3 FALSE      L5-B1-3 v…
##  6 Unknown P7      T       B3      P7-B3-5 Unknow… FALSE      P7-B3-5 v…
##  7 L5      U1      B2      B1      U1-B1-1 L5-B2-2 FALSE      L5-B2-2 v…
##  8 M2      Unknown B2      B       Unknow… M2-B2-4 FALSE      M2-B2-4 v…
##  9 L5      Unknown B1      J       Unknow… L5-B1-3 FALSE      L5-B1-3 v…
## 10 Unknown Unknown T       J       Unknow… Unknow… FALSE      Unknown-J…
## 11 P7      Unknown B3      T       Unknow… P7-B3-5 FALSE      P7-B3-5 v…
## 12 Unknown Unknown J       T       Unknow… Unknow… FALSE      Unknown-J…
## # … with 19 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## #   ccf <dbl>, cms <dbl>, D <dbl>, matches <dbl>, mismatches <dbl>,
## #   non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## #   sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## #   bestlandmatch <lgl>, sameland_pred <lgl>, truth <chr>,
## #   prediction <chr>
Signatures_Phoenix %>% 
  filter(id %in% c("L5-B1-3", "P7-B3-5")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("L5-B1-3 VS P7-B3-5")
## Warning: Removed 1924 rows containing missing values (geom_path).

Signatures_Phoenix %>% 
  filter(id %in% c("L5-B1-3", "Unknown-J-5")) %>% #Possibly a Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("L5-B1-3 VS Unknown-J-5")
## Warning: Removed 1411 rows containing missing values (geom_path).

Signatures_Phoenix %>% 
  filter(id %in% c("L5-B2-2", "U1-B1-1")) %>% #Possibly a Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("L5-B2-2 vs U1-B1-1")
## Warning: Removed 1774 rows containing missing values (geom_path).

Signatures_Phoenix %>% 
  filter(id %in% c("M2-B2-4", "Unknown-B-2")) %>% #Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("M2-B2-4 vs Unknown-B-2")
## Warning: Removed 1765 rows containing missing values (geom_path).

Signatures_Phoenix %>% 
  filter(id %in% c("P7-B3-5", "Unknown-T-3")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("P7-B3-5 vs Unknown-T-3")
## Warning: Removed 1882 rows containing missing values (geom_path).

Signatures_Phoenix %>% 
  filter(id %in% c("Unknown-J-5", "Unknown-T-3")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) + 
  geom_line()+
    theme_bw()+
  scale_color_brewer(palette = "Dark2")+
    ggtitle("Unknown-J-5 vs Unknown-T-3")
## Warning: Removed 1369 rows containing missing values (geom_path).

# Analyze signatures for these comparisons
f2 <- f2 %>% mutate(
  samesource = ifelse(f2$comparison == "M2-B2-4 vs Unknown-B-2" & f2$samesource == "FALSE", TRUE, samesource)
)